package study.elasticsearch.util;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import study.elasticsearch.vo.JdHtmlContent;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

public class HtmlParseUtil {
    public static void main(String[] args) throws Exception {
        parseJD("鞋子男").forEach(System.out::println);

    }

    public static List<JdHtmlContent> parseJD(String keyWord) throws Exception {
        String url = "https://search.jd.com/Search?keyword="+keyWord;

        Document document =Jsoup.parse(new URL(url),30000);

        Element element = document.getElementById("J_goodsList");
        System.out.println(element.html());
        Elements elements =element.getElementsByTag("li");
        List<JdHtmlContent> list = new ArrayList<>();
        for(Element el : elements){
            //因为图片是延迟加载，所以不能通过attr("src")获取到source-data-lazy-img
            String image = el.getElementsByTag("img").eq(0).attr("data-lazy-img");
            String price = el.getElementsByClass("p-price").eq(0).text();
            String title = el.getElementsByClass("p-name").eq(0).text();
            JdHtmlContent JdHtmlContent = new JdHtmlContent();
            JdHtmlContent.setImg(image);
            JdHtmlContent.setTitle(title);
            JdHtmlContent.setPrice(price);
            list.add(JdHtmlContent);
        }
        return list;

    }
}
